########################################################################
#   Project: The Press-Safety Paradox of Democracies in Media Systems, #
#            Regime-type Durability and Journalist Killings            #
#   Author: Jonathan A. Solis                                          #
#   Task: Replication Materials for Foreign Policy Analysis article -  #
#         Survival Analysis (manuscript and appendix)                  #
#   R version: 3.6.0                                                   #
#   Last updated: 8/2/2019                                             #
########################################################################

#####################
# Survival Analysis #
#####################

#Create data for Table , Manuscript

rm(list=ls())

setwd("[working directory here]")

library(foreign)
library(survival)
library(norm)
library(ggplot2)
library(survminer)
library(GGally)
library(gmodels)
library(dplyr)
library(stargazer)

#                                                                                             #
# Data for Table 4, Manuscript:                                                               #
# Includes: Cox Proportional Hazards models 11-14, Therneau-Grambsch (TG) Tests, and          #
# calculations of Hazard rates.                                                               #
#                                                                                             #

#Load data
survival<-read.csv("Solis_fpa_july2019.csv") 

#Creat log of durability
survival <- survival %>%
  mutate(seq_ln = log(seq1))

#Check data
glimpse(survival)

#Stratify sample

#Autocracy
sa.auto <- subset(survival, durable2 == 0)
#Anocracy
sa.ano <- subset(survival, durable2 == 1)
#Democracy
sa.demo <- subset(survival, durable2 == 2)

############################
#####Full Sample############
############################

#Estimate model
cox.full <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  survival, 
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

cox.full

#                    coef  se(coef)       se2     Chisq   DF       p
#seq_ln          -0.23472   0.05918   0.05446  15.72981  1.0 7.3e-05
#polity2          0.01568   0.02009   0.01850   0.60927  1.0  0.4351
#public_cor       0.62676   0.40125   0.33144   2.43985  1.0  0.1183
#physical_vd     -3.51627   0.56603   0.48624  38.59077  1.0 5.2e-10
#express_vd       2.46761   0.62330   0.54747  15.67350  1.0 7.5e-05
#intensity2       0.83136   0.09116   0.08275  83.17630  1.0 < 2e-16
#info             0.01596   0.00572   0.00438   7.77567  1.0  0.0053
#pop_ln           0.43473   0.06368   0.04577  46.60485  1.0 8.7e-12
#frailty(ccode)                               168.42547 70.8 6.3e-10

#Iterations: 7 outer, 36 Newton-Raphson
#Variance of random effect= 0.643   I-likelihood = -1528.6 
#Degrees of freedom for terms=  0.8  0.8  0.7  0.7  0.8  0.8  0.6  0.5 70.8 
#Likelihood ratio test=953  on 76.6 df, p=<2e-16
#n= 3586, number of events= 384 
#(666 observations deleted due to missingness)

#T-G Test for proportionality 
cox.zph(cox.full, transform='log')

#                rho  chisq      p
#seq_ln       0.0172  0.189 0.6634
#polity2     -0.0430  1.198 0.2736
#public_cor   0.0286  0.760 0.3833
#physical_vd -0.0684  3.981 0.0460
#express_vd   0.0495  1.888 0.1695
#intensity2  -0.0774  4.305 0.0380
#info         0.0111  0.142 0.7063
#pop_ln      -0.0213  0.532 0.4659
#GLOBAL           NA 13.414 0.0984

#Graph Scoenfeld Residuals
ggcoxdiagnostics(cox.full, type = "schoenfeld",
                 linear.predictions = FALSE,ggtheme = theme_bw())

#Result: physical_vd and intensity2 violate proportionality assumption.
#Diagnostics: Overall, model should be good because Global (p >.05). 

#Proportional Hazard
coef <- summary(cox.full)$coefficients[1, 1]
coef
exp(coef)
#0.7907903
1-0.7907903
#0.2092097; ~21%

##########################
#####Autocracy############
##########################

#Estimate model
cox.auto <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.auto,      
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

cox.auto

#                  coef se(coef)     se2   Chisq   DF       p
#seq_ln         -0.5059   0.2014  0.1716  6.3076 1.00   0.012
#polity2         0.1407   0.2322  0.2034  0.3668 1.00   0.545
#public_cor     -0.4225   1.2843  1.0454  0.1082 1.00   0.742
#physical_vd    -2.2895   1.4762  1.2980  2.4055 1.00   0.121
#express_vd      0.4007   1.6077  1.4128  0.0621 1.00   0.803
#intensity2      1.3437   0.2600  0.2363 26.7087 1.00 2.4e-07
#info            0.0297   0.0167  0.0136  3.1434 1.00   0.076
#pop_ln          0.2853   0.1525  0.1274  3.5001 1.00   0.061
#frailty(ccode)                          16.4199 9.19   0.064

#Iterations: 7 outer, 48 Newton-Raphson
#Variance of random effect= 0.488   I-likelihood = -103.6 
#Degrees of freedom for terms= 0.7 0.8 0.7 0.8 0.8 0.8 0.7 0.7 9.2 
#Likelihood ratio test=105  on 15.1 df, p=2e-15
#n= 597, number of events= 44 
#(116 observations deleted due to missingness)

#T-G Test for proportionality 
cox.zph(cox.auto, transform='log')

#               rho chisq     p
#seq_ln       0.0692 0.270 0.603
#polity2     -0.1649 1.553 0.213
#public_cor   0.0444 0.165 0.684
#physical_vd -0.1671 1.438 0.230
#express_vd   0.1532 1.315 0.251
#intensity2   0.0585 0.170 0.680
#info         0.0847 0.528 0.467
#pop_ln      -0.1556 1.439 0.230
#GLOBAL           NA 6.462 0.596

#Graph Scoenfeld Residuals
ggcoxdiagnostics(cox.auto, type = "schoenfeld",
                 linear.predictions = FALSE,ggtheme = theme_bw())

#Result: model/individual variables do not violate proportionality assumption
#Diagnostics: Overall, model should be good because Global (p >.05). 

#Proportional Hazard
coef <- summary(cox.auto)$coefficients[1, 1]
coef
exp(coef)
#0.6012533
1-0.6012533
#0.3987467; ~40%

#########################
#####Anocracy############
#########################

#Estimate model
cox.ano <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 + public_cor
                 + physical_vd + express_vd + intensity2 
                 + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.ano, 
                 control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

cox.ano

#                   coef se(coef)      se2    Chisq DF       p
#seq_ln         -0.27733  0.10430  0.09772  7.06958  1 0.00784
#polity2         0.03201  0.03491  0.03155  0.84077  1 0.35918
#public_cor     -0.15536  0.64708  0.55041  0.05765  1 0.81026
#physical_vd    -3.06127  0.76923  0.66842 15.83757  1 6.9e-05
#express_vd      2.63870  0.77482  0.67584 11.59782  1 0.00066
#intensity2      0.80672  0.12868  0.11917 39.30533  1 3.6e-10
#info            0.01826  0.00788  0.00619  5.36749  1 0.02052
#pop_ln          0.30013  0.09914  0.08052  9.16537  1 0.00247
#frailty(ccode)                            43.10915 23 0.00663

#Iterations: 7 outer, 35 Newton-Raphson
#Variance of random effect= 0.348   I-likelihood = -425.2 
#Degrees of freedom for terms=  0.9  0.8  0.7  0.8  0.8  0.9  0.6  0.7 23.0 
#Likelihood ratio test=253  on 29 df, p=0
#n=1067 (107 observations deleted due to missingness)
#plot(survfit(res.cox),xlim=c(1992, 2014))

#T-G Test for proportionality 
cox.zph(cox.ano, transform='log')

#                 rho   chisq      p
#seq_ln        0.06343 0.69956 0.4029
#polity2      -0.02158 0.08774 0.7671
#public_cor    0.01390 0.04343 0.8349
##physical_vd -0.06030 0.83672 0.3603
#express_vd    0.00476 0.00478 0.9449
#intensity2   -0.16433 4.68451 0.0304
#info          0.02080 0.10811 0.7423
#pop_ln        0.02787 0.19644 0.6576
#GLOBAL             NA 6.77781 0.5608

#Graph Scoenfeld Residuals
ggcoxdiagnostics(cox.ano, type = "schoenfeld",
                 linear.predictions = FALSE,ggtheme = theme_bw())

#Result: intensity2 does not violate proportionality assumption
#Diagnostics: Overall, model should be good because Global (p >.05). 

#Proportional Hazard
coef <- summary(cox.ano)$coefficients[1, 1]
coef
exp(coef)
#0.7578026
1-0.7578026
#0.2421974; ~24%

##########################
#####Democracy############
##########################

#Estimate model
cox.demo <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.demo, 
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

cox.demo

#                   coef se(coef)      se2    Chisq   DF       p
#seq_ln          0.13779  0.10841  0.09291  1.61548  1.0  0.2037
#polity2        -0.26736  0.09908  0.08639  7.28206  1.0  0.0070
#public_cor      1.22116  0.61030  0.50049  4.00371  1.0  0.0454
#physical_vd    -4.93416  0.90397  0.75148 29.79301  1.0 4.8e-08
#express_vd      2.51175  1.31053  1.11620  3.67335  1.0  0.0553
#intensity2      0.45043  0.15114  0.12086  8.88227  1.0  0.0029
#info            0.02466  0.00806  0.00618  9.34986  1.0  0.0022
#pop_ln          0.55431  0.08271  0.06536 44.91338  1.0 2.1e-11
#frailty(ccode)                            43.78076 23.5  0.0067

#Iterations: 7 outer, 65 Newton-Raphson
#Variance of random effect= 0.331   I-likelihood = -626.8 
#Degrees of freedom for terms=  0.7  0.8  0.7  0.7  0.7  0.6  0.6  0.6 23.5 
#Likelihood ratio test=570  on 28.9 df, p=<2e-16
#n= 1922, number of events= 201 
#(247 observations deleted due to missingness)

#T-G Test for proportionality 
cox.zph(cox.demo, transform='log')

#                 rho    chisq       p
#seq_ln      -0.07891  2.37157 0.12356
#polity2     -0.13759  6.85699 0.00883
#public_cor  -0.09437  3.82102 0.05061
#physical_vd -0.01062  0.04942 0.82407
#express_vd   0.03995  0.62550 0.42901
#intensity2  -0.04192  0.85319 0.35565
#info        -0.05472  1.46829 0.22562
#pop_ln      -0.00213  0.00203 0.96408
#GLOBAL            NA 17.14707 0.02861

#Graph Scoenfeld Residuals
ggcoxdiagnostics(cox.demo, type = "schoenfeld",
                 linear.predictions = FALSE,ggtheme = theme_bw())

#Result: Model violates proportionality assumption (GLOBAL p<.05). Also, polity2 and public_cor.
#Diagnostics: Address polity2 and public_cor using spline regressions (Keele 2010).

#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#
######### SPLINE THERAPY ########
#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#-#

#R crashes, so run sperate spline regressions on polity, then public_cor to inspect results
# then run w/ the interactions for manuscript model

#Run nonlinearity tests 
cox.demo.spline.polity <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + pspline(polity2, df=3)
                                + public_cor
                                + physical_vd + express_vd 
                                + intensity2 
                                + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.demo, 
                                control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

summary(cox.demo.spline.polity)

#Public Cor
cox.demo.spline.pc<- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 
                           + pspline(public_cor, df=2)
                           + physical_vd + express_vd 
                           + intensity2 
                           + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.demo, 
                           control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

summary(cox.demo.spline.pc)

###Run new model w/ time interactions

#create time interactions
t<-sa.demo$year
corpt<-sa.demo$public_cor
polity<-sa.demo$polity2
sa.demo$yearxcorpt <- log(t)*corpt
sa.demo$yearxpolity <- log(t)*polity

cox.demo.spline2 <- coxph(Surv(X_t01, X_t1, X_d1) ~ seq_ln + polity2 + public_cor
                          + physical_vd + express_vd  + intensity2
                          + info + pop_ln +  yearxpolity + yearxcorpt  + frailty(ccode), data =  sa.demo,
                          control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

cox.demo.spline2

#coef  se(coef)       se2     Chisq   DF       p
#seq_ln          1.44e-01  1.10e-01  9.30e-02  1.71e+00  1.0  0.1905
#polity2         4.80e+02  1.77e+02  1.72e+02  7.39e+00  1.0  0.0066
#public_cor      1.25e+03  8.84e+02  8.51e+02  2.00e+00  1.0  0.1573
#physical_vd    -4.90e+00  9.07e-01  7.52e-01  2.92e+01  1.0 6.5e-08
#express_vd      2.57e+00  1.33e+00  1.12e+00  3.77e+00  1.0  0.0523
#intensity2      4.72e-01  1.55e-01  1.24e-01  9.22e+00  1.0  0.0024
#info            2.48e-02  8.47e-03  6.22e-03  8.55e+00  1.0  0.0035
#pop_ln          5.62e-01  8.42e-02  6.64e-02  4.46e+01  1.0 2.4e-11
#yearxpolity    -6.32e+01  2.33e+01  2.27e+01  7.40e+00  1.0  0.0065
#yearxcorpt     -1.64e+02  1.16e+02  1.12e+02  2.00e+00  1.0  0.1577
#frailty(ccode)                                4.58e+01 24.2  0.0051

#Iterations: 7 outer, 56 Newton-Raphson
#Variance of random effect= 0.355   I-likelihood = -622.9 
#Degrees of freedom for terms=  0.7  1.0  0.9  0.7  0.7  0.6  0.5  0.6  1.0  0.9 24.2 
#Likelihood ratio test=580  on 31.9 df, p=<2e-16
#n= 1922, number of events= 201 
#(247 observations deleted due to missingness)

#Use this to get exact numbers for coefficients
summary(cox.demo.spline2)$coefficients[1, 1]

#T-G Test for proportionality 
cox.zph(cox.demo.spline2, transform='log')

#                rho  chisq      p
#seq_ln      -0.0978  3.795 0.0514
#polity2     -0.1245  2.872 0.0901
#public_cor  -0.0554  0.666 0.4145
#physical_vd  0.0224  0.218 0.6404
#express_vd   0.0200  0.159 0.6900
#intensity2  -0.0234  0.275 0.5998
#info        -0.0715  2.737 0.0981
#pop_ln       0.0309  0.439 0.5074
#yearxcorpt   0.0553  0.665 0.4149
#yearxpolity  0.1245  2.873 0.0901
#GLOBAL           NA 11.409 0.3266

#Graph Scoenfeld Residuals
ggcoxdiagnostics(cox.demo.spline2, type = "schoenfeld",
                 linear.predictions = FALSE,ggtheme = theme_bw())

#Result: After using splines, model/variables do not violate proportionality assumption.
#Diagnostics: Overall, model should be good because Global (p >.05). 

#Create data for Table 10, Appendix#

#########
###ONE###
#########

#How many events?

#Global
one<-survival$X_d1
CrossTable(one) 
#439

#Auto
one<-sa.auto$X_d1
CrossTable(one) 
#48

#Ano
one<-sa.ano$X_d1
CrossTable(one) 
#154

#Demo
one<-sa.demo$X_d1
CrossTable(one) 
#219

#########
###TWO###
#########

#How many events?

#Global
two<-survival$X_d2
CrossTable(two) 
#203

#Auto
two<-sa.auto$X_d2
CrossTable(two) 
#18

#Ano
two<-sa.ano$X_d2
CrossTable(two) 
#68

#Demo
two<-sa.demo$X_d2
CrossTable(two) 
#101

###########
###THREE###
###########

#How many events?

#Global
three<-survival$X_d3
CrossTable(three) 
#119

#Auto
three<-sa.auto$X_d3
CrossTable(three) 
#11

#Ano
three<-sa.ano$X_d3
CrossTable(three) 
#47

#Demo
three<-sa.demo$X_d3
CrossTable(three) 
#52

#Creat figures for Figure 6, appendix#

#Figure 6a-One journalist killed threshold

####Plot three regime types
if (T) {
plot(survfit(cox.auto), col = c('black') , conf.int=F,lwd=4, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.ano), col = c('black'), conf.int=F, lty="dotted",xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.demo.spline2), col = c('black') , conf.int=F, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
legend(1986.5,0.35, c("Autocracies","Anocracies", "Democracies"),
       lty = c("solid","dotted","solid"),lwd=c(4,1,1), col=c("black","black", "black"), 
       cex=2.25, y.intersp = .25,x.intersp = .15,seg.len=.5, bty = "n", adj = c(0, .5))
}
#Delete figure from Plots environment
dev.off(dev.list()["RStudioGD"])

#Figure 6b-Twojournalists killed threshold
cox.auto.two <- coxph(Surv(X_t02, X_t2, X_d2) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data = sa.auto,      
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))
cox.ano.two <- coxph(Surv(X_t02, X_t2, X_d2) ~ seq_ln + polity2 + public_cor
                 + physical_vd + express_vd + intensity2 
                 + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.ano, 
                 control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))
cox.demo.two <- coxph(Surv(X_t02, X_t2, X_d2) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data =  sa.demo, 
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

####Plot three regime types: Two killings threshold
if (T) {
plot(survfit(cox.auto.two), col = c('black') , conf.int=F,lwd=4, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.ano.two), col = c('black'), conf.int=F, lty="dotted",xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.demo.two), col = c('black') , conf.int=F, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
legend(1986.5,0.35, c("Autocracies","Anocracies", "Democracies"),
       lty = c("solid","dotted","solid"),lwd=c(4,1,1), col=c("black","black", "black"), 
       cex=2.25, y.intersp = .25,x.intersp = .15,seg.len=.5, bty = "n", adj = c(0, .5))
}
#Delete figure from Plots environment
dev.off(dev.list()["RStudioGD"])

#Figure 6c-Threejournalists killed threshold

#Autocracies gives warning, but like my Cajun uncle says, "It won't hurt none" 
cox.auto.three <- coxph(Surv(X_t03, X_t3, X_d3) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data =sa.auto,      
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))
cox.ano.three <- coxph(Surv(X_t03, X_t3, X_d3) ~ seq_ln + polity2 + public_cor
                 + physical_vd + express_vd + intensity2 
                 + info + pop_ln + frailty(ccode), na.action=na.exclude, data = sa.ano, 
                 control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))
cox.demo.three <- coxph(Surv(X_t03, X_t3, X_d3) ~ seq_ln + polity2 + public_cor
                  + physical_vd + express_vd + intensity2 
                  + info + pop_ln + frailty(ccode), na.action=na.exclude, data = sa.demo, 
                  control=coxph.control(eps=1e-09,iter.max=100,outer.max=100))

####Plot three regime types
if (T) {
plot(survfit(cox.auto.three), col = c('black') , conf.int=F,lwd=4, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.ano.three), col = c('black'), conf.int=F, lty="dotted",xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
par(new=TRUE)
plot(survfit(cox.demo.three), col = c('black') , conf.int=F, xlim=c(1990, 2015), ylab = "Survival Probability",
     xlab = "Years",cex.lab=1.75, mgp=c(2,.75,0))
legend(1986.5,0.35, c("Autocracies","Anocracies", "Democracies"),
       lty = c("solid","dotted","solid"),lwd=c(4,1,1), col=c("black","black", "black"), 
       cex=2.25, y.intersp = .25,x.intersp = .15,seg.len=.5, bty = "n", adj = c(0, .5))
}
#Delete figure from Plots environment
dev.off(dev.list()["RStudioGD"])

################
##  END ~ jas ##
################